scRNA data

Load packages and data

# Load packages
require(tidyverse)
require(Seurat)
require(SingleR)

# Load data
dat <- readRDS(here::here("calderon/data/main.Rds"))

Data structure of Seurat object

# Class
class(dat)
[1] "Seurat"
attr(,"package")
[1] "SeuratObject"
# Structure of data
str(dat)
Formal class 'Seurat' [package "SeuratObject"] with 13 slots
  ..@ assays      :List of 1
  .. ..$ RNA:Formal class 'Assay' [package "SeuratObject"] with 8 slots
  .. .. .. ..@ counts       :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
  .. .. .. .. .. ..@ i       : int [1:212703167] 92 111 118 260 273 378 462 466 475 511 ...
  .. .. .. .. .. ..@ p       : int [1:547806] 0 247 472 897 1099 1342 1614 1835 2211 2493 ...
  .. .. .. .. .. ..@ Dim     : int [1:2] 23932 547805
  .. .. .. .. .. ..@ Dimnames:List of 2
  .. .. .. .. .. .. ..$ : chr [1:23932] "gfzf" "FBti0060344" "sisRNA:CR46358" "Osi24" ...
  .. .. .. .. .. .. ..$ : chr [1:547805] "exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG" ...
  .. .. .. .. .. ..@ x       : num [1:212703167] 1 1 1 1 2 1 1 1 2 1 ...
  .. .. .. .. .. ..@ factors : list()
  .. .. .. ..@ data         :Formal class 'dgCMatrix' [package "Matrix"] with 6 slots
  .. .. .. .. .. ..@ i       : int [1:212703167] 92 111 118 260 273 378 462 466 475 511 ...
  .. .. .. .. .. ..@ p       : int [1:547806] 0 247 472 897 1099 1342 1614 1835 2211 2493 ...
  .. .. .. .. .. ..@ Dim     : int [1:2] 23932 547805
  .. .. .. .. .. ..@ Dimnames:List of 2
  .. .. .. .. .. .. ..$ : chr [1:23932] "gfzf" "FBti0060344" "sisRNA:CR46358" "Osi24" ...
  .. .. .. .. .. .. ..$ : chr [1:547805] "exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG" ...
  .. .. .. .. .. ..@ x       : num [1:212703167] 3.4 3.4 3.4 3.4 4.07 ...
  .. .. .. .. .. ..@ factors : list()
  .. .. .. ..@ scale.data   : num [1:2000, 1:547805] -0.00507 -0.03863 -0.02815 -0.01328 -0.02461 ...
  .. .. .. .. ..- attr(*, "dimnames")=List of 2
  .. .. .. .. .. ..$ : chr [1:2000] "CG12426" "CG5023" "CG7910" "CG4367" ...
  .. .. .. .. .. ..$ : chr [1:547805] "exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG" ...
  .. .. .. ..@ key          : chr "rna_"
  .. .. .. ..@ assay.orig   : NULL
  .. .. .. ..@ var.features : chr [1:2000] "CG7465" "CG7017" "CG7715" "CG11350" ...
  .. .. .. ..@ meta.features:'data.frame':  23932 obs. of  5 variables:
  .. .. .. .. ..$ vst.mean                 : num [1:23932] 4.90e-02 0.00 5.66e-05 8.55e-03 1.52e-01 ...
  .. .. .. .. ..$ vst.variance             : num [1:23932] 7.99e-02 0.00 7.48e-05 1.05e-02 2.71e-01 ...
  .. .. .. .. ..$ vst.variance.expected    : num [1:23932] 7.75e-02 0.00 7.37e-05 1.34e-02 2.90e-01 ...
  .. .. .. .. ..$ vst.variance.standardized: num [1:23932] 1.032 0 1.016 0.782 0.933 ...
  .. .. .. .. ..$ vst.variable             : logi [1:23932] FALSE FALSE FALSE FALSE FALSE FALSE ...
  .. .. .. ..@ misc         : list()
  ..@ meta.data   :'data.frame':    547805 obs. of  24 variables:
  .. ..$ orig.ident        : chr [1:547805] "exp1" "exp1" "exp1" "exp1" ...
  .. ..$ nCount_RNA        : num [1:547805] 346 279 586 292 335 354 287 509 438 313 ...
  .. ..$ nFeature_RNA      : int [1:547805] 247 225 425 202 243 272 221 376 282 227 ...
  .. ..$ rt_bc             : chr [1:547805] "AAGTTGCCAT" "TTAATTATTG" "CAGGTATGGA" "AAGTAGTCAG" ...
  .. ..$ cell              : chr [1:547805] "exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG" ...
  .. ..$ time              : chr [1:547805] "hrs_06_10" "hrs_08_12" "hrs_08_12" "hrs_10_14" ...
  .. ..$ percent.mt        : num [1:547805] 1.445 0.717 1.024 0.342 4.478 ...
  .. ..$ percent.ribo      : num [1:547805] 4.335 5.018 6.485 0.685 5.672 ...
  .. ..$ barn_dro          : int [1:547805] 371 331 630 320 386 380 307 552 492 344 ...
  .. ..$ barn_hu           : int [1:547805] 0 4 0 3 0 0 0 2 1 2 ...
  .. ..$ barn_mus          : int [1:547805] 0 5 0 0 0 1 0 1 1 0 ...
  .. ..$ RNA_snn_res.0.8   : chr [1:547805] "16" "7" "4" "23" ...
  .. ..$ doublet_score     : num [1:547805] 0.0528 0.0722 0.0901 0.0659 0.0848 ...
  .. ..$ predicted_doublet : chr [1:547805] "Singlet" "Singlet" "Singlet" "Singlet" ...
  .. ..$ doublet_cluster   : logi [1:547805] FALSE FALSE FALSE FALSE FALSE FALSE ...
  .. ..$ seurat_clusters   : Factor w/ 25 levels "0","1","2","3",..: 22 12 12 13 12 12 10 22 10 12 ...
  .. ..$ doublet_subcluster: logi [1:547805] FALSE FALSE FALSE FALSE FALSE FALSE ...
  .. ..$ RNA_snn_res.0.08  : Factor w/ 25 levels "0","1","2","3",..: 22 12 12 13 12 12 10 22 10 12 ...
  .. ..$ lasso_age         : num [1:547805] 8.4 11.6 11.3 12.5 12.5 ...
  .. ..$ NNv1_age          : num [1:547805] 7.89 9.94 10.06 12.21 13.74 ...
  .. ..$ NNv2_age          : num [1:547805] 8.12 9.96 11.16 13.83 12.52 ...
  .. ..$ lasso_shift       : num [1:547805] 0 0 0 0 -1.46 ...
  .. ..$ NNv1_shift        : num [1:547805] 0 0 0 0 -0.261 ...
  .. ..$ NNv2_shift        : num [1:547805] 0 0 0 0 -1.48 ...
  ..@ active.assay: chr "RNA"
  ..@ active.ident: Factor w/ 25 levels "0","1","2","3",..: 22 12 12 13 12 12 10 22 10 12 ...
  .. ..- attr(*, "names")= chr [1:547805] "exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG" ...
  ..@ graphs      :List of 2
  .. ..$ RNA_nn :Formal class 'Graph' [package "SeuratObject"] with 7 slots
  .. .. .. ..@ assay.used: chr "RNA"
  .. .. .. ..@ i         : int [1:10956100] 0 1098 1806 2469 2949 5770 6177 6303 6711 7000 ...
  .. .. .. ..@ p         : int [1:547806] 0 22 46 70 94 116 137 157 178 194 ...
  .. .. .. ..@ Dim       : int [1:2] 547805 547805
  .. .. .. ..@ Dimnames  :List of 2
  .. .. .. .. ..$ : chr [1:547805] "exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG" ...
  .. .. .. .. ..$ : chr [1:547805] "exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG" ...
  .. .. .. ..@ x         : num [1:10956100] 1 1 1 1 1 1 1 1 1 1 ...
  .. .. .. ..@ factors   : list()
  .. ..$ RNA_snn:Formal class 'Graph' [package "SeuratObject"] with 7 slots
  .. .. .. ..@ assay.used: chr "RNA"
  .. .. .. ..@ i         : int [1:25102879] 0 99 248 479 876 1098 1524 1806 1870 2353 ...
  .. .. .. ..@ p         : int [1:547806] 0 44 90 134 180 228 274 322 375 424 ...
  .. .. .. ..@ Dim       : int [1:2] 547805 547805
  .. .. .. ..@ Dimnames  :List of 2
  .. .. .. .. ..$ : chr [1:547805] "exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG" ...
  .. .. .. .. ..$ : chr [1:547805] "exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG" ...
  .. .. .. ..@ x         : num [1:25102879] 1 0.0811 0.1111 0.1111 0.2121 ...
  .. .. .. ..@ factors   : list()
  ..@ neighbors   : list()
  ..@ reductions  :List of 2
  .. ..$ pca :Formal class 'DimReduc' [package "SeuratObject"] with 9 slots
  .. .. .. ..@ cell.embeddings           : num [1:547805, 1:50] -1.627 -0.136 0.435 1.609 0.194 ...
  .. .. .. .. ..- attr(*, "dimnames")=List of 2
  .. .. .. .. .. ..$ : chr [1:547805] "exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG" ...
  .. .. .. .. .. ..$ : chr [1:50] "PC_1" "PC_2" "PC_3" "PC_4" ...
  .. .. .. ..@ feature.loadings          : num [1:2000, 1:50] 0.01534 0.00381 0.00905 0.0051 0.00235 ...
  .. .. .. .. ..- attr(*, "dimnames")=List of 2
  .. .. .. .. .. ..$ : chr [1:2000] "CG7465" "CG7017" "CG7715" "CG11350" ...
  .. .. .. .. .. ..$ : chr [1:50] "PC_1" "PC_2" "PC_3" "PC_4" ...
  .. .. .. ..@ feature.loadings.projected: num[0 , 0 ] 
  .. .. .. ..@ assay.used                : chr "RNA"
  .. .. .. ..@ global                    : logi FALSE
  .. .. .. ..@ stdev                     : num [1:50] 4.82 3.49 3 2.59 2.26 ...
  .. .. .. ..@ key                       : chr "PC_"
  .. .. .. ..@ jackstraw                 :Formal class 'JackStrawData' [package "SeuratObject"] with 4 slots
  .. .. .. .. .. ..@ empirical.p.values     : num[0 , 0 ] 
  .. .. .. .. .. ..@ fake.reduction.scores  : num[0 , 0 ] 
  .. .. .. .. .. ..@ empirical.p.values.full: num[0 , 0 ] 
  .. .. .. .. .. ..@ overall.p.values       : num[0 , 0 ] 
  .. .. .. ..@ misc                      :List of 1
  .. .. .. .. ..$ total.variance: num 578
  .. ..$ umap:Formal class 'DimReduc' [package "SeuratObject"] with 9 slots
  .. .. .. ..@ cell.embeddings           : num [1:547805, 1:2] -5.89 -5.67 -5.5 -4.86 -5.44 ...
  .. .. .. .. ..- attr(*, "scaled:center")= num [1:2] -0.571 0.161
  .. .. .. .. ..- attr(*, "dimnames")=List of 2
  .. .. .. .. .. ..$ : chr [1:547805] "exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA" "exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG" ...
  .. .. .. .. .. ..$ : chr [1:2] "UMAP_1" "UMAP_2"
  .. .. .. ..@ feature.loadings          : num[0 , 0 ] 
  .. .. .. ..@ feature.loadings.projected: num[0 , 0 ] 
  .. .. .. ..@ assay.used                : chr "RNA"
  .. .. .. ..@ global                    : logi TRUE
  .. .. .. ..@ stdev                     : num(0) 
  .. .. .. ..@ key                       : chr "UMAP_"
  .. .. .. ..@ jackstraw                 :Formal class 'JackStrawData' [package "SeuratObject"] with 4 slots
  .. .. .. .. .. ..@ empirical.p.values     : num[0 , 0 ] 
  .. .. .. .. .. ..@ fake.reduction.scores  : num[0 , 0 ] 
  .. .. .. .. .. ..@ empirical.p.values.full: num[0 , 0 ] 
  .. .. .. .. .. ..@ overall.p.values       : num[0 , 0 ] 
  .. .. .. ..@ misc                      : list()
  ..@ images      : list()
  ..@ project.name: chr "SeuratProject"
  ..@ misc        : list()
  ..@ version     :Classes 'package_version', 'numeric_version'  hidden list of 1
  .. ..$ : int [1:3] 4 0 2
  ..@ commands    :List of 7
  .. ..$ NormalizeData.RNA       :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
  .. .. .. ..@ name       : chr "NormalizeData.RNA"
  .. .. .. ..@ time.stamp : POSIXct[1:1], format: "2022-03-08 01:25:40"
  .. .. .. ..@ assay.used : chr "RNA"
  .. .. .. ..@ call.string: chr "NormalizeData(data)"
  .. .. .. ..@ params     :List of 5
  .. .. .. .. ..$ assay               : chr "RNA"
  .. .. .. .. ..$ normalization.method: chr "LogNormalize"
  .. .. .. .. ..$ scale.factor        : num 10000
  .. .. .. .. ..$ margin              : num 1
  .. .. .. .. ..$ verbose             : logi TRUE
  .. ..$ FindVariableFeatures.RNA:Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
  .. .. .. ..@ name       : chr "FindVariableFeatures.RNA"
  .. .. .. ..@ time.stamp : POSIXct[1:1], format: "2022-03-08 01:25:49"
  .. .. .. ..@ assay.used : chr "RNA"
  .. .. .. ..@ call.string: chr "FindVariableFeatures(data, selection.method = \"vst\")"
  .. .. .. ..@ params     :List of 12
  .. .. .. .. ..$ assay              : chr "RNA"
  .. .. .. .. ..$ selection.method   : chr "vst"
  .. .. .. .. ..$ loess.span         : num 0.3
  .. .. .. .. ..$ clip.max           : chr "auto"
  .. .. .. .. ..$ mean.function      :function (mat, display_progress)  
  .. .. .. .. ..$ dispersion.function:function (mat, display_progress)  
  .. .. .. .. ..$ num.bin            : num 20
  .. .. .. .. ..$ binning.method     : chr "equal_width"
  .. .. .. .. ..$ nfeatures          : num 2000
  .. .. .. .. ..$ mean.cutoff        : num [1:2] 0.1 8
  .. .. .. .. ..$ dispersion.cutoff  : num [1:2] 1 Inf
  .. .. .. .. ..$ verbose            : logi TRUE
  .. ..$ ScaleData.RNA           :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
  .. .. .. ..@ name       : chr "ScaleData.RNA"
  .. .. .. ..@ time.stamp : POSIXct[1:1], format: "2022-03-08 01:26:11"
  .. .. .. ..@ assay.used : chr "RNA"
  .. .. .. ..@ call.string: chr "ScaleData(FindVariableFeatures(data, selection.method = \"vst\"))"
  .. .. .. ..@ params     :List of 10
  .. .. .. .. ..$ features          : chr [1:2000] "CG7465" "CG7017" "CG7715" "CG11350" ...
  .. .. .. .. ..$ assay             : chr "RNA"
  .. .. .. .. ..$ model.use         : chr "linear"
  .. .. .. .. ..$ use.umi           : logi FALSE
  .. .. .. .. ..$ do.scale          : logi TRUE
  .. .. .. .. ..$ do.center         : logi TRUE
  .. .. .. .. ..$ scale.max         : num 10
  .. .. .. .. ..$ block.size        : num 1000
  .. .. .. .. ..$ min.cells.to.block: num 3000
  .. .. .. .. ..$ verbose           : logi TRUE
  .. ..$ RunPCA.RNA              :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
  .. .. .. ..@ name       : chr "RunPCA.RNA"
  .. .. .. ..@ time.stamp : POSIXct[1:1], format: "2022-03-08 01:32:38"
  .. .. .. ..@ assay.used : chr "RNA"
  .. .. .. ..@ call.string: chr "RunPCA(data)"
  .. .. .. ..@ params     :List of 10
  .. .. .. .. ..$ assay          : chr "RNA"
  .. .. .. .. ..$ npcs           : num 50
  .. .. .. .. ..$ rev.pca        : logi FALSE
  .. .. .. .. ..$ weight.by.var  : logi TRUE
  .. .. .. .. ..$ verbose        : logi TRUE
  .. .. .. .. ..$ ndims.print    : int [1:5] 1 2 3 4 5
  .. .. .. .. ..$ nfeatures.print: num 30
  .. .. .. .. ..$ reduction.name : chr "pca"
  .. .. .. .. ..$ reduction.key  : chr "PC_"
  .. .. .. .. ..$ seed.use       : num 42
  .. ..$ RunUMAP.RNA.pca         :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
  .. .. .. ..@ name       : chr "RunUMAP.RNA.pca"
  .. .. .. ..@ time.stamp : POSIXct[1:1], format: "2022-03-08 01:54:45"
  .. .. .. ..@ assay.used : chr "RNA"
  .. .. .. ..@ call.string: chr "RunUMAP(data, dims = 1:50, n.components = 2)"
  .. .. .. ..@ params     :List of 26
  .. .. .. .. ..$ dims                : int [1:50] 1 2 3 4 5 6 7 8 9 10 ...
  .. .. .. .. ..$ reduction           : chr "pca"
  .. .. .. .. ..$ assay               : chr "RNA"
  .. .. .. .. ..$ slot                : chr "data"
  .. .. .. .. ..$ umap.method         : chr "uwot"
  .. .. .. .. ..$ return.model        : logi FALSE
  .. .. .. .. ..$ n.neighbors         : int 30
  .. .. .. .. ..$ n.components        : num 2
  .. .. .. .. ..$ metric              : chr "cosine"
  .. .. .. .. ..$ learning.rate       : num 1
  .. .. .. .. ..$ min.dist            : num 0.3
  .. .. .. .. ..$ spread              : num 1
  .. .. .. .. ..$ set.op.mix.ratio    : num 1
  .. .. .. .. ..$ local.connectivity  : int 1
  .. .. .. .. ..$ repulsion.strength  : num 1
  .. .. .. .. ..$ negative.sample.rate: int 5
  .. .. .. .. ..$ uwot.sgd            : logi FALSE
  .. .. .. .. ..$ seed.use            : int 42
  .. .. .. .. ..$ angular.rp.forest   : logi FALSE
  .. .. .. .. ..$ densmap             : logi FALSE
  .. .. .. .. ..$ dens.lambda         : num 2
  .. .. .. .. ..$ dens.frac           : num 0.3
  .. .. .. .. ..$ dens.var.shift      : num 0.1
  .. .. .. .. ..$ verbose             : logi TRUE
  .. .. .. .. ..$ reduction.name      : chr "umap"
  .. .. .. .. ..$ reduction.key       : chr "UMAP_"
  .. ..$ FindNeighbors.RNA.umap  :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
  .. .. .. ..@ name       : chr "FindNeighbors.RNA.umap"
  .. .. .. ..@ time.stamp : POSIXct[1:1], format: "2022-03-08 01:58:04"
  .. .. .. ..@ assay.used : chr "RNA"
  .. .. .. ..@ call.string: chr "FindNeighbors(data, reduction = \"umap\", dims = 1:2)"
  .. .. .. ..@ params     :List of 17
  .. .. .. .. ..$ reduction      : chr "umap"
  .. .. .. .. ..$ dims           : int [1:2] 1 2
  .. .. .. .. ..$ assay          : chr "RNA"
  .. .. .. .. ..$ k.param        : num 20
  .. .. .. .. ..$ return.neighbor: logi FALSE
  .. .. .. .. ..$ compute.SNN    : logi TRUE
  .. .. .. .. ..$ prune.SNN      : num 0.0667
  .. .. .. .. ..$ nn.method      : chr "annoy"
  .. .. .. .. ..$ n.trees        : num 50
  .. .. .. .. ..$ annoy.metric   : chr "euclidean"
  .. .. .. .. ..$ nn.eps         : num 0
  .. .. .. .. ..$ verbose        : logi TRUE
  .. .. .. .. ..$ force.recalc   : logi FALSE
  .. .. .. .. ..$ do.plot        : logi FALSE
  .. .. .. .. ..$ graph.name     : chr [1:2] "RNA_nn" "RNA_snn"
  .. .. .. .. ..$ l2.norm        : logi FALSE
  .. .. .. .. ..$ cache.index    : logi FALSE
  .. ..$ FindClusters            :Formal class 'SeuratCommand' [package "SeuratObject"] with 5 slots
  .. .. .. ..@ name       : chr "FindClusters"
  .. .. .. ..@ time.stamp : POSIXct[1:1], format: "2022-03-08 01:59:04"
  .. .. .. ..@ assay.used : chr "RNA"
  .. .. .. ..@ call.string: chr "FindClusters(data, resolution = 0.08)"
  .. .. .. ..@ params     :List of 10
  .. .. .. .. ..$ graph.name      : chr "RNA_snn"
  .. .. .. .. ..$ modularity.fxn  : num 1
  .. .. .. .. ..$ resolution      : num 0.08
  .. .. .. .. ..$ method          : chr "matrix"
  .. .. .. .. ..$ algorithm       : num 1
  .. .. .. .. ..$ n.start         : num 10
  .. .. .. .. ..$ n.iter          : num 10
  .. .. .. .. ..$ random.seed     : num 0
  .. .. .. .. ..$ group.singletons: logi TRUE
  .. .. .. .. ..$ verbose         : logi TRUE
  ..@ tools       : list()
# Peak at the data
head(dat)
                                                       orig.ident nCount_RNA
exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT       exp1        346
exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG       exp1        279
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA       exp1        586
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG       exp1        292
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGGAGTTTTGAGGTAGAA       exp1        335
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGTCTCATTCGATATTGA       exp1        354
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCTCCTGATTGCGCCAGAA       exp1        287
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTACGCCGTTCA       exp1        509
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTGTTGAAGGAT       exp1        438
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTTGATAGAACG       exp1        313
                                                       nFeature_RNA      rt_bc
exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT          247 AAGTTGCCAT
exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG          225 TTAATTATTG
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA          425 CAGGTATGGA
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG          202 AAGTAGTCAG
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGGAGTTTTGAGGTAGAA          243 TGAGGTAGAA
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGTCTCATTCGATATTGA          272 TCGATATTGA
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCTCCTGATTGCGCCAGAA          221 TGCGCCAGAA
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTACGCCGTTCA          376 ACGCCGTTCA
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTGTTGAAGGAT          282 GTTGAAGGAT
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTTGATAGAACG          227 TGATAGAACG
                                                                                                         cell
exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT
exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGGAGTTTTGAGGTAGAA exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGGAGTTTTGAGGTAGAA
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGTCTCATTCGATATTGA exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGTCTCATTCGATATTGA
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCTCCTGATTGCGCCAGAA exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCTCCTGATTGCGCCAGAA
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTACGCCGTTCA exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTACGCCGTTCA
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTGTTGAAGGAT exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTGTTGAAGGAT
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTTGATAGAACG exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTTGATAGAACG
                                                            time percent.mt
exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT hrs_06_10  1.4450867
exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG hrs_08_12  0.7168459
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA hrs_08_12  1.0238908
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG hrs_10_14  0.3424658
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGGAGTTTTGAGGTAGAA hrs_14_18  4.4776119
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGTCTCATTCGATATTGA hrs_14_18  0.5649718
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCTCCTGATTGCGCCAGAA hrs_08_12  0.0000000
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTACGCCGTTCA hrs_08_12  0.5893910
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTGTTGAAGGAT hrs_06_10  0.6849315
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTTGATAGAACG hrs_14_18  0.6389776
                                                       percent.ribo barn_dro
exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT    4.3352601      371
exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG    5.0179211      331
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA    6.4846416      630
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG    0.6849315      320
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGGAGTTTTGAGGTAGAA    5.6716418      386
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGTCTCATTCGATATTGA    1.6949153      380
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCTCCTGATTGCGCCAGAA    3.4843206      307
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTACGCCGTTCA    5.5009823      552
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTGTTGAAGGAT    0.4566210      492
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTTGATAGAACG   13.4185304      344
                                                       barn_hu barn_mus
exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT       0        0
exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG       4        5
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA       0        0
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG       3        0
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGGAGTTTTGAGGTAGAA       0        0
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGTCTCATTCGATATTGA       0        1
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCTCCTGATTGCGCCAGAA       0        0
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTACGCCGTTCA       2        1
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTGTTGAAGGAT       1        1
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTTGATAGAACG       2        0
                                                       RNA_snn_res.0.8
exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT              16
exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG               7
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA               4
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG              23
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGGAGTTTTGAGGTAGAA               8
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGTCTCATTCGATATTGA               8
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCTCCTGATTGCGCCAGAA               6
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTACGCCGTTCA               0
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTGTTGAAGGAT              13
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTTGATAGAACG              12
                                                       doublet_score
exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT    0.05276064
exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG    0.07219402
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA    0.09008910
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG    0.06585398
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGGAGTTTTGAGGTAGAA    0.08478501
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGTCTCATTCGATATTGA    0.08257165
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCTCCTGATTGCGCCAGAA    0.06023494
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTACGCCGTTCA    0.08149588
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTGTTGAAGGAT    0.04439834
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTTGATAGAACG    0.07449210
                                                       predicted_doublet
exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT           Singlet
exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG           Singlet
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA           Singlet
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG           Singlet
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGGAGTTTTGAGGTAGAA           Singlet
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGTCTCATTCGATATTGA           Singlet
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCTCCTGATTGCGCCAGAA           Singlet
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTACGCCGTTCA           Singlet
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTGTTGAAGGAT           Singlet
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTTGATAGAACG           Singlet
                                                       doublet_cluster
exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT           FALSE
exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG           FALSE
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA           FALSE
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG           FALSE
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGGAGTTTTGAGGTAGAA           FALSE
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGTCTCATTCGATATTGA           FALSE
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCTCCTGATTGCGCCAGAA           FALSE
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTACGCCGTTCA           FALSE
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTGTTGAAGGAT           FALSE
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTTGATAGAACG           FALSE
                                                       seurat_clusters
exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT              21
exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG              11
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA              11
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG              12
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGGAGTTTTGAGGTAGAA              11
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGTCTCATTCGATATTGA              11
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCTCCTGATTGCGCCAGAA               9
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTACGCCGTTCA              21
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTGTTGAAGGAT               9
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTTGATAGAACG              11
                                                       doublet_subcluster
exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT              FALSE
exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG              FALSE
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA              FALSE
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG              FALSE
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGGAGTTTTGAGGTAGAA              FALSE
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGTCTCATTCGATATTGA              FALSE
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCTCCTGATTGCGCCAGAA              FALSE
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTACGCCGTTCA              FALSE
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTGTTGAAGGAT              FALSE
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTTGATAGAACG              FALSE
                                                       RNA_snn_res.0.08
exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT               21
exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG               11
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA               11
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG               12
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGGAGTTTTGAGGTAGAA               11
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGTCTCATTCGATATTGA               11
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCTCCTGATTGCGCCAGAA                9
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTACGCCGTTCA               21
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTGTTGAAGGAT                9
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTTGATAGAACG               11
                                                       lasso_age  NNv1_age
exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT  8.402794  7.885203
exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG 11.597093  9.940381
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA 11.266422 10.062633
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG 12.544636 12.211529
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGGAGTTTTGAGGTAGAA 12.542527 13.739003
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGTCTCATTCGATATTGA 13.458162 15.126904
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCTCCTGATTGCGCCAGAA  9.715162  9.000631
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTACGCCGTTCA 12.520638 11.694070
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTGTTGAAGGAT 10.045072 10.022837
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTTGATAGAACG 10.338754 15.219664
                                                        NNv2_age lasso_shift
exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT  8.119637  0.00000000
exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG  9.959537  0.00000000
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA 11.156040  0.00000000
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG 13.830982  0.00000000
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGGAGTTTTGAGGTAGAA 12.519031 -1.45747285
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGTCTCATTCGATATTGA 14.558554 -0.54183787
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCTCCTGATTGCGCCAGAA 11.355991  0.00000000
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTACGCCGTTCA 13.233130  0.52063800
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTGTTGAAGGAT 10.513521  0.04507167
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTTGATAGAACG 14.577216 -3.66124596
                                                        NNv1_shift NNv2_shift
exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTGCCAT  0.00000000  0.0000000
exp1_idx1AGACCATATC_idx2AACGAGGCTA.ACGAGGTTTTTAATTATTG  0.00000000  0.0000000
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCAGAACCTCAGGTATGGA  0.00000000  0.0000000
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGAGAGATAAGTAGTCAG  0.00000000  0.0000000
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGGAGTTTTGAGGTAGAA -0.26099682 -1.4809694
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCGTCTCATTCGATATTGA  0.00000000  0.0000000
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CCTCCTGATTGCGCCAGAA  0.00000000  0.0000000
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTACGCCGTTCA  0.00000000  1.2331295
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTGTTGAAGGAT  0.02283669  0.5135212
exp1_idx1AGACCATATC_idx2AACGAGGCTA.CGAGATGCTTGATAGAACG  0.00000000  0.0000000
# Print dimensions
# Rows = genes; columns = cells
dim(dat)
[1]  23932 547805

Metadata per cell

# Metadata of each cell
glimpse(dat@meta.data)
Rows: 547,805
Columns: 24
$ orig.ident         <chr> "exp1", "exp1", "exp1", "exp1", "exp1", "exp1", "ex…
$ nCount_RNA         <dbl> 346, 279, 586, 292, 335, 354, 287, 509, 438, 313, 3…
$ nFeature_RNA       <int> 247, 225, 425, 202, 243, 272, 221, 376, 282, 227, 2…
$ rt_bc              <chr> "AAGTTGCCAT", "TTAATTATTG", "CAGGTATGGA", "AAGTAGTC…
$ cell               <chr> "exp1_idx1AGACCATATC_idx2AACGAGGCTA.AACCGCTGTAAGTTG…
$ time               <chr> "hrs_06_10", "hrs_08_12", "hrs_08_12", "hrs_10_14",…
$ percent.mt         <dbl> 1.4450867, 0.7168459, 1.0238908, 0.3424658, 4.47761…
$ percent.ribo       <dbl> 4.3352601, 5.0179211, 6.4846416, 0.6849315, 5.67164…
$ barn_dro           <int> 371, 331, 630, 320, 386, 380, 307, 552, 492, 344, 3…
$ barn_hu            <int> 0, 4, 0, 3, 0, 0, 0, 2, 1, 2, 4, 3, 0, 26, 0, 2, 0,…
$ barn_mus           <int> 0, 5, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, …
$ RNA_snn_res.0.8    <chr> "16", "7", "4", "23", "8", "8", "6", "0", "13", "12…
$ doublet_score      <dbl> 0.05276064, 0.07219402, 0.09008910, 0.06585398, 0.0…
$ predicted_doublet  <chr> "Singlet", "Singlet", "Singlet", "Singlet", "Single…
$ doublet_cluster    <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FA…
$ seurat_clusters    <fct> 21, 11, 11, 12, 11, 11, 9, 21, 9, 11, 9, 11, 11, 21…
$ doublet_subcluster <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FA…
$ RNA_snn_res.0.08   <fct> 21, 11, 11, 12, 11, 11, 9, 21, 9, 11, 9, 11, 11, 21…
$ lasso_age          <dbl> 8.402794, 11.597093, 11.266422, 12.544636, 12.54252…
$ NNv1_age           <dbl> 7.885203, 9.940381, 10.062633, 12.211529, 13.739003…
$ NNv2_age           <dbl> 8.119637, 9.959537, 11.156040, 13.830982, 12.519031…
$ lasso_shift        <dbl> 0.00000000, 0.00000000, 0.00000000, 0.00000000, -1.…
$ NNv1_shift         <dbl> 0.00000000, 0.00000000, 0.00000000, 0.00000000, -0.…
$ NNv2_shift         <dbl> 0.00000000, 0.00000000, 0.00000000, 0.00000000, -1.…
# Number of cells per experiment
dat@meta.data %>% 
  dplyr::count(orig.ident)
  orig.ident      n
1       exp1 182708
2      exp10 158594
3      exp11  75926
4      exp12  64254
5       exp2  23317
6       exp3  35098
7       exp4   7908
# Number of cells per time window
dat@meta.data %>% 
  dplyr::count(time)
        time     n
1  hrs_00_02 78890
2  hrs_01_03 56028
3  hrs_02_04 18951
4  hrs_03_07 62789
5  hrs_04_08 41484
6  hrs_06_10 42942
7  hrs_08_12 66230
8  hrs_10_14 33843
9  hrs_12_16 37139
10 hrs_14_18 29824
11 hrs_16_20 79685
# Number of cells in each time window
dat@meta.data %>%
  dplyr::count(seurat_clusters)
   seurat_clusters     n
1                0 41050
2                1 40470
3                2 37524
4                3 33726
5                4 32135
6                5 29773
7                6 28949
8                7 27662
9                8 27404
10               9 26887
11              10 25458
12              11 24903
13              12 24859
14              13 24162
15              14 21136
16              15 20130
17              16 16828
18              17 16617
19              18 14149
20              19 12636
21              20  7369
22              21  5894
23              22  5164
24              23  1984
25              24   936

Visualize clusters

# UMAP with clusters labeled with number
DimPlot(dat, 
        label = TRUE)

Subset data 5 - 6 hr embryos

# Subset the data to between 5 and 6 hours according to the NNv1 model
# Remove the old features in the dat object from the dat_5
# Slim down Seurat object so it doesn't carry over the dat from all data
dat_5 <- DietSeurat(subset(dat, NNv1_age > 5 & NNv1_age < 6))

Make new UMAP

Need to re-scale and re-cluster etc. when subsetted to create a new UMAP. See the functions below.

# Scales and centers the data of just the subset dat_5
dat_5 <- ScaleData(FindVariableFeatures(NormalizeData(dat_5), 
                                        selection.method = 'vst'))

# Returns PCA -- uses pc.genes 2000 variable features -- need to look into 
#   the pc.genes and VariableFeatures more
# Output in dat_5@reductions$pca
dat_5 <- RunPCA(dat_5, 
                pc.genes = VariableFeatures(dat_5))

# Returns UMAP
# Output in dat_5@reductions$umap
dat_5 <- RunUMAP(dat_5, 
                 dims = 1:50, 
                 n.neighbors = 10, 
                 n.components = 2)

# Nearest-neighbor graph construction
# Output in dat_5@graphs$RNA_nn; dat_5@graphs$RNA_snn
dat_5 <- FindNeighbors(dat_5, 
                       reduction = "umap", 
                       dims = 1:2)

# Identify Clusters of cells -- need to calculate the K-nearest neighbors first
# Output in seurat_clusters metadata
dat_5 <- FindClusters(dat_5, 
                      resolution = 0.08)

Plot new UMAP

DimPlot(dat_5)

Find all marker genes of each cluster

# Find the markers for each cluster
dat_5_markers <- FindAllMarkers(dat_5,
                                only.pos = TRUE, 
                                min.pct = 0.25, 
                                logfc.threshold = 0.25)
# Save just the top 10 markers
top_10_markers <- dat_5_markers %>%
    group_by(cluster) %>%
    slice_max(n = 10, 
              order_by = avg_log2FC)
# Print out the top 2 markers just to look at
top_10_markers %>%
  slice_max(n = 2, 
            order_by = avg_log2FC) %>%
  select(gene, everything())
# A tibble: 14 × 7
# Groups:   cluster [7]
   gene                p_val avg_log2FC pct.1 pct.2 p_val_adj cluster
   <chr>               <dbl>      <dbl> <dbl> <dbl>     <dbl> <fct>  
 1 RpL23A          2.27e-125      1.23  0.6   0.347 5.43e-121 0      
 2 RpL6            2.41e-124      1.23  0.608 0.359 5.77e-120 0      
 3 CadN            1.38e- 55      2.08  0.284 0.153 3.30e- 51 1      
 4 mt:srRNA        4.81e- 74      1.58  0.467 0.311 1.15e- 69 1      
 5 lncRNA:Hsromega 0              1.99  0.891 0.523 0         2      
 6 Antp            1.10e- 69      1.84  0.376 0.207 2.63e- 65 2      
 7 hpRNA:CR46342   1.25e-180      0.936 0.409 0.087 3.00e-176 3      
 8 RpL17           2.71e-202      0.934 0.662 0.197 6.49e-198 3      
 9 Pde9            0              5.11  0.494 0.017 0         4      
10 apolpp          8.71e-113      4.40  0.537 0.145 2.08e-108 4      
11 spri            1.43e-226      3.62  0.613 0.074 3.43e-222 5      
12 Pdp1            1.13e-182      3.32  0.666 0.116 2.71e-178 5      
13 Gapdh2          7.11e-172      2.79  0.271 0.013 1.70e-167 6      
14 bru1            1.91e- 43      2.17  0.25  0.049 4.56e- 39 6      

Differential expression analysis

Trying differential expression in 5 hr embryos vs. 6 hr embryos.

Integrate the data sets

# Create data subset and clear other info
dat_5 <- DietSeurat(subset(dat, NNv1_age > 5.45 & NNv1_age < 5.55))
dat_10 <- DietSeurat(subset(dat, NNv1_age > 10.1 & NNv1_age < 10.9))

# Perform integration
dat_anchors <- FindIntegrationAnchors(object.list = list(dat_5, dat_10), 
                                      dims = 1:20)
dat_comb <- IntegrateData(anchorset = dat_anchors,
                          dims = 1:20)

Perform an integrated analysis

# Set default to integrated
DefaultAssay(dat_comb) <- "integrated"

# Run the standard workflow for visualization and clustering
dat_comb <- ScaleData(dat_comb, verbose = FALSE)
dat_comb <- RunPCA(dat_comb, npcs = 30, verbose = FALSE)

# t-SNE and Clustering
dat_comb <- RunUMAP(dat_comb, reduction = "pca", dims = 1:20)
dat_comb <- FindNeighbors(dat_comb, reduction = "pca", dims = 1:20)
dat_comb <- FindClusters(dat_comb, resolution = 0.5)

# Split the data by the two time windows for plotting
# Not sure if this info exists somewhere else?
dat_comb@meta.data <- dat_comb@meta.data %>%
  mutate(time = ifelse(NNv1_age < 6, "5", "10"))
DimPlot(dat_comb,
        group.by = "time")

DimPlot(dat_comb,
        label = TRUE)

DimPlot(dat_comb,
        split.by = "time")

Counts

# Number of cells per time window
dat_comb@meta.data %>% 
  dplyr::count(time)
  time    n
1   10 7579
2    5 8193
# Number of cells in cluster
dat_comb@meta.data %>%
  dplyr::count(seurat_clusters)
   seurat_clusters    n
1                0 4792
2                1 2960
3                2 2305
4                3 1432
5                4  702
6                5  661
7                6  609
8                7  584
9                8  536
10               9  496
11              10  354
12              11  291
13              12   28
14              13   22
# Number of cells per time window and cluster
dat_comb@meta.data %>% 
  dplyr::count(time, seurat_clusters) %>%
  pivot_wider(values_from = n,
              names_from = time)
# A tibble: 14 × 3
   seurat_clusters  `10`   `5`
   <fct>           <int> <int>
 1 0                2661  2131
 2 1                 880  2080
 3 2                 439  1866
 4 3                 911   521
 5 4                 236   466
 6 5                 390   271
 7 6                 440   169
 8 7                 317   267
 9 8                 379   157
10 9                 357   139
11 10                256    98
12 11                279    12
13 12                 20     8
14 13                 14     8
# Plot the number of cells in each cluster
dat_comb@meta.data %>% 
  dplyr::count(time, seurat_clusters) %>%
  mutate(time = fct_relevel(time, c("5", "10"))) %>%
  ggplot() +
  geom_col(aes(x = seurat_clusters,
               y = n,
               fill = time),
           position = "dodge") +
  scale_fill_manual(values = c("#888888", "#444444")) +
  theme_minimal()

Differential expression between clusters

# Set default assay back to RNA
DefaultAssay(dat_comb) <- "RNA"

# Find the conserved markers between conditions ----

# Initialize list to save markers for each cluster comparison
markers_all <- list()

# Loop through all clusters to find markers in each cluster
for (i in levels(dat_comb$seurat_clusters)) {
  # Find the conserved markers between 5 hr and 10 hr embryos
  marker <- FindConservedMarkers(dat_comb,
                                 ident.1 = i,
                                 grouping.var = "time",
                                 verbose = FALSE)
  # Save markers in the list
  markers_all[[i]] <- marker
}

# Combine into a single data_frame
dat_comb_cluster_degs <- bind_rows(markers_all, .id = "cluster")
# Count the number of DEGs per cluster
dat_comb_cluster_degs %>%
  group_by(cluster) %>%
  tally() %>%
  arrange(desc(n))
# A tibble: 14 × 2
   cluster     n
   <chr>   <int>
 1 4        1321
 2 12        533
 3 13        528
 4 5         413
 5 11        364
 6 6         321
 7 1         187
 8 8         148
 9 0         126
10 10         55
11 3          42
12 2          24
13 9          20
14 7          13

Annotation of cell types and tissues

For cluster annotation, we used the Berkeley Drosophila Genome Project (BDGP) database, which includes gene expression patterns of approximately 8600 genes in drosophila staged embryos as detected by in situ hybridization (20, 21, 58). The BDGP database gives a stagespecific expression pattern (“term”) for each tested gene during embryogenesis. We used Fisher’s test to look for enrichment of BDGP gene expression terms in each cluster’s marker genes. Top ten terms per cluster were examined. To pick a specific term out of the top ten, we further examined the BDGP terms of the top 20 marker genes for each cluster.

However, there do seem to be resources out there to automate the annotation process. For example, SingleR.

Annotations from Calderon RNA-Seq data

# This meta data has the annotations for each cell
rna_meta <- readRDS(here::here("calderon/data/rna_meta.rds"))
colnames(rna_meta)
 [1] "inferred_time_window" "seurat_clusters"      "experiment"          
 [4] "nCount_RNA"           "nFeature_RNA"         "cell"                
 [7] "time"                 "percent.mt"           "percent.ribo"        
[10] "doublet_score"        "lasso_age"            "NNv1_age"            
[13] "NNv2_age"             "lasso_shift"          "NNv1_shift"          
[16] "NNv2_shift"           "manual_annot"         "germ_layer"          
[19] "hex"                  "hex2"                
# Added information in the rna_meta data
setdiff(colnames(rna_meta), colnames(dat@meta.data))
[1] "inferred_time_window" "experiment"           "manual_annot"        
[4] "germ_layer"           "hex"                  "hex2"                
# List of the manual annotations
unique(rna_meta$germ_layer)
 [1] "maternal"          ""                  "neuroectoderm"    
 [4] "ectoderm"          "endoderm"          "extra-embryonic"  
 [7] "mesoderm"          "unknown"           "blastoderm / pole"
[10] "germ cell"        
unique(rna_meta$manual_annot)
 [1] "maternal"                            
 [2] "germ cell"                           
 [3] "brain"                               
 [4] "epidermis"                           
 [5] "midgut"                              
 [6] "yolk nuclei"                         
 [7] "amnioserosa"                         
 [8] "somatic muscle"                      
 [9] "tracheal system"                     
[10] "salivary gland"                      
[11] "plasmatocytes"                       
[12] "sensory nervous system primordium"   
[13] "muscle"                              
[14] "hindgut / anal pad"                  
[15] "glia"                                
[16] "fat body"                            
[17] "crystal cell"                        
[18] "ventral nerve cord"                  
[19] "proventriculus"                      
[20] "foregut / hindgut / anal pad"        
[21] "sensory nervous system"              
[22] "visceral muscle"                     
[23] "spiracles (tracheal system)"         
[24] "Malpighian tubule"                   
[25] "ubiquitous"                          
[26] "spiracle (tracheal system)"          
[27] "ectoderm anlage in statu nascendi"   
[28] "cellular blastoderm / pole cell"     
[29] "anlage in statu nascendi"            
[30] "ectoderm anlage"                     
[31] "ectoderm primordium"                 
[32] "mesoderm primordium"                 
[33] "peripheral nervous system primordium"
[34] "endoderm primordium"                 
[35] "ventral nerve cord primordium"       
[36] "tracheal primordium"                 
[37] "foregut / hindgut primordium"        
[38] "epidermis primordium"                
[39] "brain primordium"                    
[40] "trunk mesoderm primordium"           
[41] "visceral muscle primordium"          
[42] "midgut primordium"                   
[43] "salivary gland primordium"           
[44] "unknown"                             
[45] "midline primordium"                  
[46] "somatic muscle primordium"           
[47] "peripheral nervous system"           
[48] "hindgut primordium"                  
[49] "procrystal cell"                     
[50] "plasmatocytes anlage"                
[51] "salivary gland body primordium"      

SingleR for annotation of cells

# Establish a reference object from the Calderon data

# Need to create a SummarizedExperiment object from dat object
# Add the rna_meta annotations from the Calderon data
dat@meta.data$manual_annot <- rna_meta$manual_annot
dat@meta.data$germ_layer <- rna_meta$germ_layer
# Create reference data set -- use just 7 hour embryos to annotate 
ref_sce <- as.SingleCellExperiment(DietSeurat(subset(dat, NNv1_age > 7 & 
                                                       NNv1_age < 8)))
ref_sce <- scuttle::logNormCounts(ref_sce)
# Convert to a single cell experiment object for annotations
sce_comb <- as.SingleCellExperiment(DietSeurat(dat_comb))

# Create logNormCounts for data
sce_comb <- scuttle::logNormCounts(sce_comb)

Create annotation labels for each cell with SingleR.

# Run SingleR to create annotation labels for each cell
annot <- SingleR(test = sce_comb,
                 assay.type.test = 1,
                 ref = ref_sce,
                 labels = ref_sce$manual_annot)

Check annotations

# Annotation diagnostics
plotScoreHeatmap(annot)

# Set the annotations back in the Seurat object for later
dat_comb@meta.data$annot <- annot$pruned.labels
# Count the number of cells in each annotation
dat_comb@meta.data %>%
  group_by(time, annot) %>%
  tally() %>%
  pivot_wider(values_from = n,
              names_from = time) %>%
  arrange(desc(`5`)) %>%
  select(annot, `5`, `10`) %>%
  print(n = 37)
# A tibble: 37 × 3
   annot                                  `5`  `10`
   <chr>                                <int> <int>
 1 yolk nuclei                           2715   794
 2 visceral muscle                        711   205
 3 glia                                   682  1145
 4 hindgut primordium                     620    37
 5 crystal cell                           544   849
 6 foregut / hindgut / anal pad           525   658
 7 sensory nervous system primordium      397   324
 8 visceral muscle primordium             351   550
 9 Malpighian tubule                      321    30
10 ventral nerve cord                     305   816
11 fat body                               198   676
12 hindgut / anal pad                     191   289
13 muscle                                 168   164
14 ventral nerve cord primordium          101    25
15 tracheal primordium                     79    28
16 spiracle (tracheal system)              72   577
17 epidermis primordium                    48     1
18 ectoderm anlage                         26    15
19 amnioserosa                             25    16
20 spiracles (tracheal system)             24    NA
21 anlage in statu nascendi                17   149
22 plasmatocytes                           12    41
23 mesoderm primordium                     11     8
24 ectoderm anlage in statu nascendi        8     3
25 midgut primordium                        7     9
26 salivary gland                           7    25
27 cellular blastoderm / pole cell          4     3
28 midgut                                   4    23
29 somatic muscle                           4    11
30 somatic muscle primordium                4    11
31 proventriculus                           3    63
32 <NA>                                     3     4
33 peripheral nervous system                2    24
34 sensory nervous system                   2     1
35 brain primordium                         1     1
36 peripheral nervous system primordium     1     2
37 epidermis                               NA     2
# Plot all the annotations on a UMAP for 5 and 10 hour embryos
DimPlot(dat_comb,
        group.by = "annot",
        split.by = "time")